load requirements
rm(list = ls())
{
library(stringr)
library(dplyr)
library(plyr)
library(magrittr)
library(survival)
library(grid)
library(forestploter)
library(openxlsx)
library(pROC)
library(rms)
library(ggsci)
library(RColorBrewer)
library(timeROC)
library(randomForest)
library(tidyverse)
library(ggplot2)
library(ggthemes)
library(ggpubr)
}
Correlation between Clinical parameters and MS
clinic <- read.table(file = './00data_preparation/ACC_clinic.tsv',sep = '\t',header=T)
variable <- c('SEX',
'AGE_new',
# 'RACE',
'Ethnicity.Race',
'Clinical.Stage',
'T.stage',
'Lymph.node',
'ATYPICAL_MITOTIC_FIGURES',
'CAPSULAR_INVASION',
'Clinical.Status.3.Mo.Post.Op',
'Distant.Metastasis',
'CYTOPLASM_PRESENCE_LESS_THAN_EQUAL_25_PERCENT',
'DIFFUSE_ARCHITECTURE',
'Excessive.Hormone',
'LATERALITY',
'METASTATIC_SITE_1',
'METASTATIC_SITE_2',
'MITOTIC_RATE',
'NECROSIS',
'NUCLEAR_GRADE',
'PHARMACEUTICAL_TX_ADJUVANT',
'PHARM_TX_MITOTANE_ADJUVANT',
'PHARM_TX_MITOTANE_THERAPUTIC_AT_REC',
'RADIATION_TREATMENT_ADJUVANT',
'Surgical.margin',
'SINUSOID_INVASION',
'PRIMARY_THERAPY_OUTCOME',
'Neoplasm.Status',
'WEISS_VENOUS_INVASION',
'TUMOR_WEIGHT_new')
data_type <- c('Without_combined',"Likely_combined","PC_combined","Putative_combined",'Strigent_combined')
# Tables
result_list <- list()
for (j in 1:5) {
result <- data.frame()
for (i in 1:length(variable)) {
name <- variable[i]
clinic <- dplyr::mutate(clinic,MS=clinic[,data_type[j]])
dat <- data.frame(table(clinic[,c('MS',name)])) %>%
ddply(.(MS),transform,percent=Freq/sum(Freq)*100)
colnames(dat)[1:2] <- c('Var1','Var2')
x <- matrix(dat$Freq,ncol=2) %>% as.data.frame() %>% set_colnames(c('MS1','MS2'))
x <- tibble::rownames_to_column(x,var = 'parameter') %>% dplyr::mutate(parameter=unique(dat$Var2))
result <- rbind(result,x)
}
result_list[[j]] <- result
}
Cor_MS_and_Clin_NR <- result_list[[1]];knitr::kable(Cor_MS_and_Clin_NR)
| Female |
34 |
14 |
| Male |
21 |
8 |
| <60 |
42 |
18 |
| >60 |
13 |
4 |
| NOT WHITE |
2 |
0 |
| WHITE |
49 |
16 |
| Stage I |
6 |
3 |
| Stage II |
24 |
13 |
| Stage III |
11 |
5 |
| Stage IV |
14 |
1 |
| T1 |
6 |
3 |
| T2 |
28 |
14 |
| T3 |
6 |
2 |
| T4 |
15 |
3 |
| N0 |
47 |
21 |
| N1 |
8 |
1 |
| Atypical Mitotic Figures Absent |
16 |
13 |
| Atypical Mitotic Figures Present |
26 |
4 |
| Invasion of Tumor Capsule Absent |
20 |
10 |
| Invasion of Tumor Capsule Present |
30 |
12 |
| Absent |
35 |
19 |
| Present |
16 |
2 |
| M0 |
41 |
21 |
| M1 |
14 |
1 |
| Cytoplasm presence <= 25% Absent |
7 |
3 |
| Cytoplasm presence <= 25% Present |
35 |
14 |
| Diffuse Architecture Absent |
14 |
5 |
| Diffuse Architecture Present |
29 |
12 |
| Absent |
15 |
11 |
| Present |
36 |
11 |
| Left |
32 |
11 |
| Right |
23 |
11 |
| Lung |
6 |
2 |
| Not Lung |
9 |
0 |
| Liver |
8 |
0 |
| Not Liver |
7 |
2 |
| Mitotic Rate > 5/50 HPF Absent |
19 |
10 |
| Mitotic Rate > 5/50 HPF Present |
30 |
10 |
| Necrosis Absent |
13 |
4 |
| Necrosis Present |
39 |
17 |
| I-II |
11 |
2 |
| III-IV |
33 |
15 |
| NO |
18 |
7 |
| YES |
36 |
14 |
| NO |
4 |
0 |
| YES |
25 |
14 |
| NO |
9 |
7 |
| YES |
2 |
2 |
| NO |
41 |
18 |
| YES |
12 |
3 |
| R0 |
37 |
18 |
| R1+R2 |
14 |
1 |
| Sinusoid Invasion Absent |
22 |
13 |
| Sinusoid Invasion Present |
20 |
4 |
| CR |
19 |
15 |
| PD |
15 |
1 |
| TUMOR FREE |
22 |
18 |
| WITH TUMOR |
32 |
4 |
| Venous Invasion Absent |
26 |
13 |
| Venous Invasion Present |
23 |
7 |
| <median |
31 |
9 |
| >median |
24 |
13 |
Cor_MS_and_Clin_LR <- result_list[[2]];knitr::kable(Cor_MS_and_Clin_LR)
| Female |
32 |
16 |
| Male |
20 |
9 |
| <60 |
41 |
19 |
| >60 |
11 |
6 |
| NOT WHITE |
2 |
0 |
| WHITE |
48 |
17 |
| Stage I |
5 |
4 |
| Stage II |
23 |
14 |
| Stage III |
11 |
5 |
| Stage IV |
13 |
2 |
| T1 |
5 |
4 |
| T2 |
27 |
15 |
| T3 |
6 |
2 |
| T4 |
14 |
4 |
| N0 |
44 |
24 |
| N1 |
8 |
1 |
| Atypical Mitotic Figures Absent |
14 |
15 |
| Atypical Mitotic Figures Present |
25 |
5 |
| Invasion of Tumor Capsule Absent |
19 |
11 |
| Invasion of Tumor Capsule Present |
28 |
14 |
| Absent |
33 |
21 |
| Present |
15 |
3 |
| M0 |
39 |
23 |
| M1 |
13 |
2 |
| Cytoplasm presence <= 25% Absent |
6 |
4 |
| Cytoplasm presence <= 25% Present |
33 |
16 |
| Diffuse Architecture Absent |
12 |
7 |
| Diffuse Architecture Present |
28 |
13 |
| Absent |
14 |
12 |
| Present |
34 |
13 |
| Left |
30 |
13 |
| Right |
22 |
12 |
| Lung |
6 |
2 |
| Not Lung |
8 |
1 |
| Liver |
7 |
1 |
| Not Liver |
7 |
2 |
| Mitotic Rate > 5/50 HPF Absent |
17 |
12 |
| Mitotic Rate > 5/50 HPF Present |
29 |
11 |
| Necrosis Absent |
12 |
5 |
| Necrosis Present |
37 |
19 |
| I-II |
9 |
4 |
| III-IV |
32 |
16 |
| NO |
16 |
9 |
| YES |
35 |
15 |
| NO |
3 |
1 |
| YES |
25 |
14 |
| NO |
9 |
7 |
| YES |
2 |
2 |
| NO |
38 |
21 |
| YES |
12 |
3 |
| R0 |
35 |
20 |
| R1+R2 |
13 |
2 |
| Sinusoid Invasion Absent |
21 |
14 |
| Sinusoid Invasion Present |
18 |
6 |
| CR |
18 |
16 |
| PD |
14 |
2 |
| TUMOR FREE |
20 |
20 |
| WITH TUMOR |
31 |
5 |
| Venous Invasion Absent |
24 |
15 |
| Venous Invasion Present |
22 |
8 |
| <median |
29 |
11 |
| >median |
23 |
14 |
Cor_MS_and_Clin_CR <- result_list[[3]];knitr::kable(Cor_MS_and_Clin_CR)
| Female |
33 |
15 |
| Male |
15 |
14 |
| <60 |
36 |
24 |
| >60 |
12 |
5 |
| NOT WHITE |
2 |
0 |
| WHITE |
45 |
20 |
| Stage I |
4 |
5 |
| Stage II |
20 |
17 |
| Stage III |
11 |
5 |
| Stage IV |
13 |
2 |
| T1 |
4 |
5 |
| T2 |
24 |
18 |
| T3 |
6 |
2 |
| T4 |
14 |
4 |
| N0 |
40 |
28 |
| N1 |
8 |
1 |
| Atypical Mitotic Figures Absent |
15 |
14 |
| Atypical Mitotic Figures Present |
23 |
7 |
| Invasion of Tumor Capsule Absent |
19 |
11 |
| Invasion of Tumor Capsule Present |
25 |
17 |
| Absent |
29 |
25 |
| Present |
15 |
3 |
| M0 |
35 |
27 |
| M1 |
13 |
2 |
| Cytoplasm presence <= 25% Absent |
7 |
3 |
| Cytoplasm presence <= 25% Present |
31 |
18 |
| Diffuse Architecture Absent |
13 |
6 |
| Diffuse Architecture Present |
25 |
16 |
| Absent |
10 |
16 |
| Present |
35 |
12 |
| Left |
27 |
16 |
| Right |
21 |
13 |
| Lung |
5 |
3 |
| Not Lung |
9 |
0 |
| Liver |
8 |
0 |
| Not Liver |
6 |
3 |
| Mitotic Rate > 5/50 HPF Absent |
17 |
12 |
| Mitotic Rate > 5/50 HPF Present |
28 |
12 |
| Necrosis Absent |
10 |
7 |
| Necrosis Present |
36 |
20 |
| I-II |
10 |
3 |
| III-IV |
29 |
19 |
| NO |
14 |
11 |
| YES |
33 |
17 |
| NO |
3 |
1 |
| YES |
23 |
16 |
| NO |
7 |
9 |
| YES |
2 |
2 |
| NO |
35 |
24 |
| YES |
11 |
4 |
| R0 |
31 |
24 |
| R1+R2 |
13 |
2 |
| Sinusoid Invasion Absent |
20 |
15 |
| Sinusoid Invasion Present |
17 |
7 |
| CR |
15 |
19 |
| PD |
15 |
1 |
| TUMOR FREE |
15 |
25 |
| WITH TUMOR |
32 |
4 |
| Venous Invasion Absent |
21 |
18 |
| Venous Invasion Present |
22 |
8 |
| <median |
27 |
13 |
| >median |
21 |
16 |
Cor_MS_and_Clin_PR <- result_list[[4]];knitr::kable(Cor_MS_and_Clin_PR)
| Female |
32 |
16 |
| Male |
20 |
9 |
| <60 |
40 |
20 |
| >60 |
12 |
5 |
| NOT WHITE |
2 |
0 |
| WHITE |
48 |
17 |
| Stage I |
4 |
5 |
| Stage II |
22 |
15 |
| Stage III |
12 |
4 |
| Stage IV |
14 |
1 |
| T1 |
4 |
5 |
| T2 |
26 |
16 |
| T3 |
6 |
2 |
| T4 |
16 |
2 |
| N0 |
44 |
24 |
| N1 |
8 |
1 |
| Atypical Mitotic Figures Absent |
14 |
15 |
| Atypical Mitotic Figures Present |
24 |
6 |
| Invasion of Tumor Capsule Absent |
18 |
12 |
| Invasion of Tumor Capsule Present |
29 |
13 |
| Absent |
31 |
23 |
| Present |
17 |
1 |
| M0 |
38 |
24 |
| M1 |
14 |
1 |
| Cytoplasm presence <= 25% Absent |
7 |
3 |
| Cytoplasm presence <= 25% Present |
31 |
18 |
| Diffuse Architecture Absent |
13 |
6 |
| Diffuse Architecture Present |
26 |
15 |
| Absent |
13 |
13 |
| Present |
35 |
12 |
| Left |
30 |
13 |
| Right |
22 |
12 |
| Lung |
7 |
1 |
| Not Lung |
9 |
0 |
| Liver |
8 |
0 |
| Not Liver |
8 |
1 |
| Mitotic Rate > 5/50 HPF Absent |
17 |
12 |
| Mitotic Rate > 5/50 HPF Present |
29 |
11 |
| Necrosis Absent |
11 |
6 |
| Necrosis Present |
38 |
18 |
| I-II |
10 |
3 |
| III-IV |
30 |
18 |
| NO |
16 |
9 |
| YES |
35 |
15 |
| NO |
3 |
1 |
| YES |
24 |
15 |
| NO |
9 |
7 |
| YES |
2 |
2 |
| NO |
37 |
22 |
| YES |
13 |
2 |
| R0 |
34 |
21 |
| R1+R2 |
14 |
1 |
| Sinusoid Invasion Absent |
20 |
15 |
| Sinusoid Invasion Present |
18 |
6 |
| CR |
16 |
18 |
| PD |
16 |
0 |
| TUMOR FREE |
18 |
22 |
| WITH TUMOR |
33 |
3 |
| Venous Invasion Absent |
22 |
17 |
| Venous Invasion Present |
24 |
6 |
| <median |
28 |
12 |
| >median |
24 |
13 |
Cor_MS_and_Clin_SR <- result_list[[5]];knitr::kable(Cor_MS_and_Clin_SR)
| Female |
23 |
25 |
| Male |
13 |
16 |
| <60 |
28 |
32 |
| >60 |
8 |
9 |
| NOT WHITE |
0 |
2 |
| WHITE |
31 |
34 |
| Stage I |
6 |
3 |
| Stage II |
14 |
23 |
| Stage III |
7 |
9 |
| Stage IV |
9 |
6 |
| T1 |
6 |
3 |
| T2 |
17 |
25 |
| T3 |
3 |
5 |
| T4 |
10 |
8 |
| N0 |
31 |
37 |
| N1 |
5 |
4 |
| Atypical Mitotic Figures Absent |
15 |
14 |
| Atypical Mitotic Figures Present |
15 |
15 |
| Invasion of Tumor Capsule Absent |
14 |
16 |
| Invasion of Tumor Capsule Present |
19 |
23 |
| Absent |
25 |
29 |
| Present |
10 |
8 |
| M0 |
27 |
35 |
| M1 |
9 |
6 |
| Cytoplasm presence <= 25% Absent |
6 |
4 |
| Cytoplasm presence <= 25% Present |
24 |
25 |
| Diffuse Architecture Absent |
9 |
10 |
| Diffuse Architecture Present |
21 |
20 |
| Absent |
9 |
17 |
| Present |
26 |
21 |
| Left |
19 |
24 |
| Right |
17 |
17 |
| Lung |
3 |
5 |
| Not Lung |
7 |
2 |
| Liver |
7 |
1 |
| Not Liver |
3 |
6 |
| Mitotic Rate > 5/50 HPF Absent |
8 |
21 |
| Mitotic Rate > 5/50 HPF Present |
24 |
16 |
| Necrosis Absent |
6 |
11 |
| Necrosis Present |
28 |
28 |
| I-II |
5 |
8 |
| III-IV |
25 |
23 |
| NO |
7 |
18 |
| YES |
28 |
22 |
| NO |
3 |
1 |
| YES |
22 |
17 |
| NO |
7 |
9 |
| YES |
3 |
1 |
| NO |
30 |
29 |
| YES |
5 |
10 |
| R0 |
25 |
30 |
| R1+R2 |
8 |
7 |
| Sinusoid Invasion Absent |
14 |
21 |
| Sinusoid Invasion Present |
15 |
9 |
| CR |
17 |
17 |
| PD |
10 |
6 |
| TUMOR FREE |
14 |
26 |
| WITH TUMOR |
21 |
15 |
| Venous Invasion Absent |
19 |
20 |
| Venous Invasion Present |
13 |
17 |
| <median |
17 |
23 |
| >median |
19 |
18 |
# P-value table
result <- data.frame(Parameter=variable,
Without_pvalue=NA,
Likely_pvalue=NA,
PC_pvalue=NA,
Putative_pvalue=NA,
Strigent_pvalue=NA)
for (i in 1:length(variable)) {
for (j in 1:5) {
name <- variable[i]
clinic <- dplyr::mutate(clinic,MS=clinic[,data_type[j]])
dat <- data.frame(table(clinic[,c('MS',name)])) %>%
ddply(.(MS),transform,percent=Freq/sum(Freq)*100)
colnames(dat)[1:2] <- c('Var1','Var2')
x <- matrix(dat$Freq,ncol=2)
if (sum(x<5)>0){
a <- fisher.test(x)
}else{
a <- chisq.test(x)
}
result[i,j+1] <- round(a$p.value,2)
}
}
result[result<0.05] <- '*'
knitr::kable(result)
| SEX |
1 |
1 |
0.21 |
1 |
0.98 |
| AGE_new |
0.76 |
1 |
0.61 |
0.99 |
1 |
| Ethnicity.Race |
1 |
1 |
1 |
1 |
0.5 |
| Clinical.Stage |
0.19 |
0.29 |
0.09 |
* |
0.3 |
| T.stage |
0.63 |
0.63 |
0.25 |
0.07 |
0.42 |
| Lymph.node |
0.43 |
0.26 |
0.14 |
0.26 |
0.73 |
| ATYPICAL_MITOTIC_FIGURES |
* |
* |
0.08 |
* |
1 |
| CAPSULAR_INVASION |
0.86 |
0.97 |
0.93 |
0.59 |
1 |
| Clinical.Status.3.Mo.Post.Op |
0.07 |
0.15 |
* |
* |
0.68 |
| Distant.Metastasis |
0.05 |
0.12 |
* |
* |
0.39 |
| CYTOPLASM_PRESENCE_LESS_THAN_EQUAL_25_PERCENT |
1 |
0.72 |
1 |
1 |
0.73 |
| DIFFUSE_ARCHITECTURE |
1 |
0.92 |
0.79 |
0.93 |
1 |
| Excessive.Hormone |
0.16 |
0.18 |
* |
0.06 |
0.15 |
| LATERALITY |
0.69 |
0.82 |
1 |
0.82 |
0.78 |
| METASTATIC_SITE_1 |
0.21 |
0.58 |
0.08 |
0.47 |
0.15 |
| METASTATIC_SITE_2 |
0.47 |
1 |
0.21 |
1 |
0.05 |
| MITOTIC_RATE |
0.56 |
0.34 |
0.47 |
0.34 |
* |
| NECROSIS |
0.76 |
0.96 |
0.9 |
1 |
0.43 |
| NUCLEAR_GRADE |
0.32 |
1 |
0.34 |
0.51 |
0.58 |
| PHARMACEUTICAL_TX_ADJUVANT |
1 |
0.79 |
0.55 |
0.79 |
* |
| PHARM_TX_MITOTANE_ADJUVANT |
0.29 |
1 |
1 |
1 |
0.63 |
| PHARM_TX_MITOTANE_THERAPUTIC_AT_REC |
1 |
1 |
1 |
1 |
0.58 |
| RADIATION_TREATMENT_ADJUVANT |
0.53 |
0.36 |
0.38 |
0.12 |
0.36 |
| Surgical.margin |
0.05 |
0.12 |
* |
* |
0.8 |
| SINUSOID_INVASION |
0.14 |
0.36 |
0.43 |
0.26 |
0.15 |
| PRIMARY_THERAPY_OUTCOME |
* |
* |
* |
* |
0.6 |
| Neoplasm.Status |
* |
* |
* |
* |
0.07 |
| WEISS_VENOUS_INVASION |
0.52 |
0.44 |
0.16 |
0.07 |
0.84 |
| TUMOR_WEIGHT_new |
0.33 |
0.47 |
0.46 |
0.81 |
0.58 |
Multivariate COX
result <- data.frame()
res.cox.all <- coxph(formula = Surv(OS.time,OS)~
# Without_combined +
# Likely_combined+
# PC_combined+
Putative_combined+
Clinical.Stage+
# Excessive.Hormone+
ATYPICAL_MITOTIC_FIGURES+
Surgical.margin+
Clinical.Status.3.Mo.Post.Op+
Distant.Metastasis+
# Neoplasm.Status+
PRIMARY_THERAPY_OUTCOME,
data = clinic)
x <- summary(res.cox.all)
result_cox <- data.frame(HR=signif(x$conf.int[,1],digits=2),
HR.confit.lower=signif(x$conf.int[,"lower .95"],2),
HR.confit.upper=signif(x$conf.int[,"upper .95"],2),
p.value =signif(x$coefficients[,"Pr(>|z|)"],digits = 3))
result_cox <- result_cox %>%
mutate('HR(95%CI)'=paste(.$HR,"(",result_cox$HR.confit.lower,"-",.$HR.confit.upper,")",sep = "")) %>%
dplyr::mutate(p.value=as.numeric(.$p.value)) %>%
dplyr::mutate(p.value= ifelse(.$p.value<0.001,'***',ifelse(.$p.value<0.01,'**',ifelse(.$p.value<0.05,'*','NS'))))%>%
tibble::rownames_to_column(var = 'Parameter')
result_cox[,2:4] <- lapply(result_cox[,2:4],as.numeric)
result <- rbind(result,result_cox)
dat <- result
dat$` ` <- paste(rep(" ", nrow(dat)), collapse = " ")
dat$Parameter <- ifelse(is.na(dat$HR),
dat$Parameter,
paste0(" ", dat$Parameter))
dat <- dat[,c(1,7,2:6)]
tm <- forest_theme(base_size = 15, #文本的大小
footnote_col = "red")
forest(dat[,c(1:2,6:7)],
est = dat$HR,
lower = dat$HR.confit.lower,
upper = dat$HR.confit.upper,
sizes = 1,
ci_column = 2,
ref_line = 1,
# arrow_lab = c("Placebo Better", "Treatment Better"),
xlim = c(0, 100),
ticks_at = c(1,50),
footnote = "",
theme = tm)

AUC of signature microbiome
micro_sig <- read.xlsx('./01clinical_analysis/source_data/Core_features_coefficients.xlsx',sheet = 2,colNames = FALSE,check.names = F)
microbiome_NR <- read.csv(file = './01clinical_analysis/source_data/Voom-SNM-ACC.csv',header = T,row.names = 1) %>% .[,micro_sig$X2] %>% set_colnames(str_sub(colnames(.),str_locate(colnames(.),'g__')[,1],str_length(colnames(.))))%>% tibble::rownames_to_column(var = 'names')
microbiome_LR <- read.csv(file = './01clinical_analysis/source_data/Voom-SNM-Filter-Likely-ACC.csv',header = T,row.names = 1) %>% .[,micro_sig$X2] %>% set_colnames(str_sub(colnames(.),str_locate(colnames(.),'g__')[,1],str_length(colnames(.))))%>% tibble::rownames_to_column(var = 'names')
microbiome_CR <- read.csv(file = './01clinical_analysis/source_data/Voom-SNM-Filter-Plate_Center-ACC.csv',header = T,row.names = 1) %>% .[,micro_sig$X2] %>% set_colnames(str_sub(colnames(.),str_locate(colnames(.),'g__')[,1],str_length(colnames(.))))%>% tibble::rownames_to_column(var = 'names')
microbiome_PR <- read.csv(file = './01clinical_analysis/source_data/Voom-SNM-Filter-Putative-ACC.csv',header = T,row.names = 1) %>% .[,micro_sig$X2] %>% set_colnames(str_sub(colnames(.),str_locate(colnames(.),'g__')[,1],str_length(colnames(.))))%>% tibble::rownames_to_column(var = 'names')
clinic.NR <- merge(clinic,microbiome_NR,by='names')
clinic.LR <- merge(clinic,microbiome_LR,by='names')
clinic.CR <- merge(clinic,microbiome_CR,by='names')
clinic.PR <- merge(clinic,microbiome_PR,by='names')
# AUC curve
roc_result <- data.frame(genus=micro_sig$X1,
NR=NA,
LR=NA,
CR=NA,
PR=NA)
clinic_type <- list(clinic.NR,clinic.LR,clinic.CR,clinic.PR)
data_type <- c('NR','LR','CR','PR')
for (i in 1:4) {
signature_combine <- psm(Surv(OS.time,OS) ~ g__Ilyobacter+g__Isosphaera+g__Singulisphaera+g__Thermopetrobacter+g__Chelativorans+g__Pseudorhodobacter+g__Wenxinia+g__Acidocella+g__Diaphorobacter+g__Vitreoscilla+g__Bacteriovorax+g__Desulfuromonas+g__Proteus+g__Terrimicrobium+g__Haloferula,
data = clinic_type[[i]], dist='lognormal')
clinic$signature_combine <- predict(signature_combine)[match(clinic$names,clinic_type[[i]]$names)]
colnames(clinic)[which(colnames(clinic)=='signature_combine')] <- paste0('signature_combine_',data_type[i])
}
roc.list <- roc(OS ~ signature_combine_NR+signature_combine_LR+signature_combine_CR+signature_combine_PR,
smooth=T,
data = clinic)
labels=c()#blank list
for(i in 1:4){
x=c('15 signature combine(NR), AUC=','15 signature combine(LR), AUC=','15 signature combine(CR), AUC=','15 signature combine(PR), AUC=')
x.1 <- paste0(x[i],round((roc.list[[i]])$auc,2))
labels <- c(labels,x.1)
};labels
## [1] "15 signature combine(NR), AUC=0.84" "15 signature combine(LR), AUC=0.83"
## [3] "15 signature combine(CR), AUC=0.82" "15 signature combine(PR), AUC=0.83"
ggroc(roc.list,linetype = 1, size = 1.2) +
scale_colour_manual(name = "",
labels =labels,
values=pal_npg()(4))+
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.border = element_rect(fill=NA,color="black", size=1.2, linetype="solid"))+
geom_segment(aes(x = 1, xend = 0, y = 0, yend = 1),
color="grey", linetype="dashed")

Time dependent ROC
clinic.roc <- clinic %>%
dplyr::mutate(OS.time= round(.$OS.time/30,2)) %>%
dplyr::mutate(NR=ifelse(.$Without_combined=='MS2',1,0)) %>%
dplyr::mutate(LR=ifelse(.$Likely_combined=='MS2',1,0)) %>%
dplyr::mutate(CR=ifelse(.$PC_combined=='MS2',1,0)) %>%
dplyr::mutate(PR=ifelse(.$Putative_combined=='MS2',1,0)) %>%
dplyr::mutate(stage.new=ifelse(.$Clinical.Stage=='Stage IV',3,
ifelse(.$Clinical.Stage=='Stage III',2,
ifelse(.$Clinical.Stage=='Stage II',1,0)))) %>%
dplyr::mutate(T.stage.new=gsub('T','',.$T.stage)) %>%
dplyr::mutate(T.stage.new=as.numeric(.$T.stage.new)-1)
res.cox.stage <- coxph(Surv(OS.time,OS)~T.stage.new,data=clinic.roc)
clinic.roc$cox.stage <- predict(res.cox.stage)
ROC.stage <- timeROC(T=clinic.roc$OS.time,delta=clinic.roc$OS,marker=clinic.roc$cox.stage,iid = T,cause=1,weighting="marginal",times=quantile(clinic.roc$OS.time,probs=seq(0.1,0.9,0.05)))
plotAUCcurve(ROC.stage,col = 'grey')
palette <- pal_npg()(6)
pval <- data.frame()
for (i in 1:4) {
data_type=c("Without_combined","Likely_combined","PC_combined","Putative_combined")
clinic.roc <- dplyr::mutate(clinic.roc,MS=clinic.roc[,data_type[i]])
res.cox <- coxph(Surv(OS.time,OS)~MS+T.stage.new,
data=clinic.roc)
clinic.roc$TYPE <- predict(res.cox)
ROC <- timeROC(T=clinic.roc$OS.time,delta=clinic.roc$OS,marker=clinic.roc$TYPE,iid = T,cause=1,weighting="marginal",times=quantile(clinic.roc$OS.time,probs=seq(0.1,0.9,0.05)))
print(plotAUCcurve(ROC,add = T,col = palette[i]))
outcome <- compare(ROC,ROC.stage,adjusted = T,abseps = 1e-06)
table <- as.data.frame(outcome$p_values_AUC)
pval <- rbind(pval,table)
}
## NULL
## NULL
## NULL
## NULL
abline(v=22.07,lwd=1,lty=2,col="#BC3C29FF")#add a vertical line
abline(v=26.96,lwd=1,lty=2,col="#BC3C29FF")
abline(v=31.894,lwd=1,lty=2,col="#BC3C29FF")
abline(v=98.408,lwd=1,lty=2,col="#BC3C29FF")
legend("bottomleft",c('MS(NR)+Stage','MS(LR)+Stage','MS(CR)+Stage','MS(PR)+Stage','Stage Only'),col=c(pal_npg()(4),'grey'),lty=1,lwd=3)

Random forest
microbiome_NR <- read.csv(file = './01clinical_analysis/source_data/Voom-SNM-ACC.csv',header = T,row.names = 1)
microbiome_LR <- read.csv(file = './01clinical_analysis/source_data/Voom-SNM-Filter-Likely-ACC.csv',header = T,row.names = 1)
microbiome_CR <- read.csv(file = './01clinical_analysis/source_data/Voom-SNM-Filter-Plate_Center-ACC.csv',header = T,row.names = 1)
microbiome_PR <- read.csv(file = './01clinical_analysis/source_data/Voom-SNM-Filter-Putative-ACC.csv',header = T,row.names = 1)
variable <- c('Clinical.Stage',
'ATYPICAL_MITOTIC_FIGURES',
'Clinical.Status.3.Mo.Post.Op',
'Distant.Metastasis',
'Excessive.Hormone',
'Surgical.margin',
'PRIMARY_THERAPY_OUTCOME',
'Neoplasm.Status')
result <- data.frame(parameter=variable,NR=NA,LR=NA,CR=NA,PR=NA)
for (i in 1:4) {
dat <- list(microbiome_NR,microbiome_LR,microbiome_CR,microbiome_PR)[[i]]
for (j in 1:length(variable)) {
mat <- dat %>% dplyr::mutate(group=as.factor(clinic[,variable[j]])) %>% drop_na(group)
set.seed(430)
rf = randomForest(group~ .,data=mat, ntree=500, importance=TRUE, proximity=TRUE)
res1 <- as.data.frame(rf$importance) %>% .[order(.$MeanDecreaseAccuracy,decreasing = T),]
res2 <- as.data.frame(rf$importance) %>% .[order(.$MeanDecreaseGini,decreasing = T),]
x <- rownames(res1)[1:10] %>% str_sub(.,str_locate(.,'g__')[,1],str_length(.))
y <- rownames(res2)[1:10]%>% str_sub(.,str_locate(.,'g__')[,1],str_length(.))
result[j,i+1] <- paste(intersect(x,y),collapse=";")
}
}
knitr::kable(result)
| Clinical.Stage |
g__Blautia;g__Devosia;g__Prevotella;g__Flavonifractor |
g__Blautia;g__Clostridium;g__Chlamydia;g__Erysipelothrix;g__Lachnoclostridium |
g__Blautia;g__Candidatus_Jettenia;g__Shinella |
g__Blautia;g__Flavonifractor;g__Aquitalea;g__Rhodoferax |
| ATYPICAL_MITOTIC_FIGURES |
g__Methanospirillum;g__Marinitoga |
g__Pseudohongiella;g__Marinitoga;g__Syntrophorhabdus;g__Arenimonas;g__Arcobacter;g__Halorhodospira |
g__Marinitoga;g__Arcobacter;g__Ruania;g__Idiomarina;g__Methanospirillum |
g__Methanospirillum;g__Marinitoga;g__Candidatus_Methanoperedens |
| Clinical.Status.3.Mo.Post.Op |
g__Raoultella;g__Proboscivirus;g__Bizionia;g__T7likevirus;g__Crocosphaera;g__Arthrobacter;g__Oceanicaulis |
g__Blautia;g__Bacteroides;g__Tenacibaculum;g__Spo1virus |
g__T4likevirus;g__Crocosphaera;g__Stigmatella |
g__Cylindrospermopsis;g__Cytomegalovirus;g__Blautia |
| Distant.Metastasis |
g__Microvirga;g__Candidatus_Jettenia;g__Bacteroides;g__Denitrovibrio;g__Cosenzaea |
g__Shewanella;g__Bacteroides;g__Spo1virus;g__Citrobacter |
g__Candidatus_Jettenia;g__Microvirga;g__Spo1virus;g__Bacteroides;g__Neochlamydia;g__Oscillatoria |
g__Spo1virus;g__Halorhodospira;g__Salinispora;g__Pseudoclavibacter;g__Sodalis |
| Excessive.Hormone |
g__Spiroplasma;g__Marichromatium;g__Entomoplasma;g__Pantoea;g__Leucobacter |
g__Algiphilus;g__Streptacidiphilus;g__Marichromatium;g__Oceanobacillus;g__Caldisericum |
g__Moraxella;g__Marichromatium;g__Spiroplasma |
g__Marichromatium;g__Streptacidiphilus;g__Spiroplasma;g__Pantoea;g__Tannerella |
| Surgical.margin |
g__Sodalis;g__Blattabacterium;g__Proboscivirus;g__Ascovirus;g__Plesiomonas |
g__Mamastrovirus;g__Geobacillus;g__Blautia;g__Thiorhodovibrio |
g__Methanosarcina;g__Blattabacterium;g__Proboscivirus;g__Methanocella;g__Sodalis;g__Geobacillus |
g__Proboscivirus;g__Sodalis |
| PRIMARY_THERAPY_OUTCOME |
g__Syntrophus;g__Azospira;g__Proboscivirus |
g__Jonesia;g__Agrobacterium;g__Diplosphaera;g__Proboscivirus;g__Hydrogenophaga |
g__Rathayibacter;g__Proboscivirus;g__Hydrogenophaga;g__Spiribacter;g__Diplosphaera |
g__Parabacteroides;g__Caedibacter;g__Proboscivirus |
| Neoplasm.Status |
g__Fimbriimonas;g__Avibacterium;g__Agromyces;g__Pedobacter;g__Leptospira;g__Moraxella |
g__Desulfobulbus;g__Spo1virus;g__Leptospira;g__Fimbriimonas;g__Psychrilyobacter |
g__Fimbriimonas;g__Avibacterium;g__Caldimonas;g__Alphabaculovirus;g__Spo1virus |
g__Caldimonas;g__Spiroplasma;g__Blastomonas;g__Parabacteroides |
MS & PHENOTYPE
clinic$mRNA_K4_new <- ifelse(str_detect(clinic$mRNA_K4,'proliferation'),NA,clinic$mRNA_K4)
pheno <- c('Histology','C1A_C1B','mRNA_K4_new','miRNA_cluster','MethyLevel','SCNA_cluster','protein_cluster','COC')
thm <- theme_bw()+
theme(panel.border = element_rect(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black",size=0.5),
text = element_text(size = 20),
axis.text.x = element_text(size=25,color ='black'),
axis.text.y = element_text(size = 20,color = 'black'),
axis.title.y = element_text(size = 20),
plot.subtitle = element_text(face = 'italic',hjust = 0.5),
legend.position='bottom',
legend.text = element_text(size = 15,hjust = 0),
legend.title = element_text(size = 20)
)
# plot for phenos
data_type <- c("Without_combined","Likely_combined","PC_combined","Putative_combined")
for (j in 1:4) {
clinic$MS <- clinic[,data_type[j]]
for (i in 1:length(pheno)) {
dat <- data.frame(table(clinic[,c('MS',pheno[i])])) %>%
ddply(.(MS),transform,percent=Freq/sum(Freq)*100)
colnames(dat)[1:2] <- c('Var1','Var2')
x <- matrix(dat$Freq,ncol=2)
if (sum(x<5)>0){
pvalue <- fisher.test(x)$p.value
# print(pvalue)
}else{
pvalue <- chisq.test(x)$p.value
# print(pvalue)
}
legend_n <- length(unique(dat$Var2))
p <- dat%>%
drop_na() %>%
ggplot(aes(fill=Var2,x=Var1,y=Freq))+
geom_bar(position = 'fill',stat = 'identity',width = 0.8)+
guides(fill=guide_legend(title=pheno[i],direction = "horizontal",
nrow=ifelse(legend_n>4,3,ifelse(legend_n>1,2,1)))) +
scale_fill_manual(values = c(pal_nejm(alpha = 0.9)(3)[2:3],pal_nejm(alpha = 0.9)(1),pal_nejm(alpha = 0.9)(8)[4:8]))+
scale_y_continuous(labels = scales::percent)+
coord_fixed(ratio = 5/2)+
labs(x='',y='Proportion %',title = data_type[j],
subtitle = paste0("P ", ifelse(pvalue<0.001, "< 0.001", paste0("= ",round(pvalue,3)))))+
thm
print(p)
# dev.off()
}
}
































# MS & ADS
for (i in 1:4) {
clinic$MS <- clinic[,data_type[i]]
p <- ggplot(data = clinic,aes(x=MS, y=ADS)) +
geom_violin(trim=T) +
geom_jitter(shape=16, color="grey",size=2.0,position=position_jitter(0.2))+
geom_boxplot(width=0.1,position=position_dodge(0.8),aes(fill=MS))+
thm+
theme(legend.position = 'right')+
scale_fill_manual(values = c(MS1="#0072B5CC",MS2="#E18727CC"))+
guides(fill=guide_legend(direction = "horizontal",ncol =1))+
labs(x="", y = "ADS ", fill = "",title = data_type[i])+
stat_compare_means(aes(x = MS , y = ADS),label = "p.format",size=6)
print(p)
}



